import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
# import seaborn as sns
import datetime
from plotly.offline import download_plotlyjs, init_notebook_mode, plot, iplot
from plotly.graph_objs import Scatter, Figure, Layout
import plotly
import plotly.graph_objs as go
import plotly.express as px
from IPython.display import Markdown as md
init_notebook_mode(connected=False)
import io
import requests
import re
Authors and sources mentioned: Editore/Autore del dataset: Dipartimento della Protezione Civile. Categoria ISO 19115: Salute. Dati forniti dal Ministero della Salute.
Regional data files (Dati per Regione):¶
- Struttura file giornaliero: dpc-covid19-ita-regioni-yyyymmdd.csv (dpc-covid19-ita-regioni-20200224.csv)
- File complessivo: dpc-covid19-ita-regioni.csv
- File ultimi dati (latest): dpc-covid19-ita-regioni-latest.csv
Below we use the 'dpc-covid19-ita-regioni.csv' file (updated on daily basis, so far).
In addition, we get regional population size from https://it.wikipedia.org/wiki/Regione_(Italia) for per capita measures
URL='https://it.wikipedia.org/wiki/Regione_(Italia)'
res=requests.get(URL)
tables=pd.read_html(res.text)
dt = tables[13]
def dewhite(x):
''.join(re.findall('\d+', x))
dt2 = dt[['Regione','Popolazione (ab.)']].copy()
dt2.columns = ['Region','Pop']
dt2.Pop = dt2.Pop.apply(lambda x: ''.join(re.findall('\d+', x))).astype(int)
s = requests.get("https://raw.githubusercontent.com/pcm-dpc/COVID-19/master/dati-regioni/dpc-covid19-ita-regioni.csv").content
dat = pd.read_csv(io.StringIO(s.decode('utf-8')))
cdate = dat.data.max()
md("Currently data as of date: {}".format(cdate))
Currently data as of date: 2021-04-15T17:00:00
md("All column names: {}".format(dat.columns.tolist()))
All column names: ['data', 'stato', 'codice_regione', 'denominazione_regione', 'lat', 'long', 'ricoverati_con_sintomi', 'terapia_intensiva', 'totale_ospedalizzati', 'isolamento_domiciliare', 'totale_positivi', 'variazione_totale_positivi', 'nuovi_positivi', 'dimessi_guariti', 'deceduti', 'casi_da_sospetto_diagnostico', 'casi_da_screening', 'totale_casi', 'tamponi', 'casi_testati', 'note', 'ingressi_terapia_intensiva', 'note_test', 'note_casi', 'totale_positivi_test_molecolare', 'totale_positivi_test_antigenico_rapido', 'tamponi_test_molecolare', 'tamponi_test_antigenico_rapido', 'codice_nuts_1', 'codice_nuts_2']
df = dat.drop(['stato','codice_regione'], axis=1)
df.columns = ['Date','Region','Lat','Long','HospWithSymptoms','IC','HospTotal','AtHome','CurrentlyPositive','VariationOfPositives','NewPositives','Recovered', 'Deaths','Diagnostico','Screening','TotalCases','NoOfTests','casi_testati','note','ingr_ter_intens','note_test','note_casi', 'totale_positivi_test_molecolare', 'totale_positivi_test_antigenico_rapido', 'tamponi_test_molecolare', 'tamponi_test_antigenico_rapido', 'codice_nuts_1', 'codice_nuts_2']
df = pd.merge(df, dt2, left_on='Region', right_on='Region')
df['Date'] = pd.to_datetime(df['Date']).dt.date
df = df.set_index(df["Date"])
df.index = pd.to_datetime(df.index)
df['NewPositives'] = np.abs(df['NewPositives'])
dat.tail(5)
| data | stato | codice_regione | denominazione_regione | lat | long | ricoverati_con_sintomi | terapia_intensiva | totale_ospedalizzati | isolamento_domiciliare | ... | note | ingressi_terapia_intensiva | note_test | note_casi | totale_positivi_test_molecolare | totale_positivi_test_antigenico_rapido | tamponi_test_molecolare | tamponi_test_antigenico_rapido | codice_nuts_1 | codice_nuts_2 | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 8752 | 2021-04-15T17:00:00 | ITA | 19 | Sicilia | 38.115697 | 13.362357 | 1218 | 184 | 1402 | 23372 | ... | NaN | 10.0 | NaN | NaN | 191973.0 | 0.0 | 2142063.0 | 1361480.0 | ITG | ITG1 |
| 8753 | 2021-04-15T17:00:00 | ITA | 9 | Toscana | 43.769231 | 11.255889 | 1633 | 286 | 1919 | 24867 | ... | NaN | 21.0 | NaN | NaN | 210734.0 | 2497.0 | 3125205.0 | 651022.0 | ITI | ITI1 |
| 8754 | 2021-04-15T17:00:00 | ITA | 10 | Umbria | 43.106758 | 12.388247 | 258 | 35 | 293 | 3354 | ... | Si fa presente che 10 dei ricoveri NON UTI, no... | 3.0 | NaN | NaN | 52846.0 | 0.0 | 823225.0 | 244027.0 | ITI | ITI2 |
| 8755 | 2021-04-15T17:00:00 | ITA | 2 | Valle d'Aosta | 45.737503 | 7.320149 | 61 | 12 | 73 | 1019 | ... | NaN | 0.0 | NaN | NaN | 9887.0 | 382.0 | 85855.0 | 19961.0 | ITC | ITC2 |
| 8756 | 2021-04-15T17:00:00 | ITA | 5 | Veneto | 45.434905 | 12.338452 | 1396 | 244 | 1640 | 26692 | ... | Nei valori riportati per le terapie intensive ... | 12.0 | NaN | NaN | 388843.0 | 10000.0 | 4784639.0 | 1878885.0 | ITH | ITH3 |
5 rows × 30 columns
df.tail()
| Date | Region | Lat | Long | HospWithSymptoms | IC | HospTotal | AtHome | CurrentlyPositive | VariationOfPositives | ... | ingr_ter_intens | note_test | note_casi | totale_positivi_test_molecolare | totale_positivi_test_antigenico_rapido | tamponi_test_molecolare | tamponi_test_antigenico_rapido | codice_nuts_1 | codice_nuts_2 | Pop | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| Date | |||||||||||||||||||||
| 2021-04-11 | 2021-04-11 | Veneto | 45.434905 | 12.338452 | 1542 | 289 | 1831 | 29976 | 31807 | -728 | ... | 13.0 | NaN | NaN | 385464.0 | 9743.0 | 4725501.0 | 1814651.0 | ITH | ITH3 | 4879133 |
| 2021-04-12 | 2021-04-12 | Veneto | 45.434905 | 12.338452 | 1568 | 286 | 1854 | 29833 | 31687 | -120 | ... | 10.0 | NaN | NaN | 386126.0 | 9668.0 | 4734330.0 | 1817569.0 | ITH | ITH3 | 4879133 |
| 2021-04-13 | 2021-04-13 | Veneto | 45.434905 | 12.338452 | 1534 | 269 | 1803 | 28756 | 30559 | -1128 | ... | 18.0 | NaN | NaN | 386762.0 | 9915.0 | 4747307.0 | 1840383.0 | ITH | ITH3 | 4879133 |
| 2021-04-14 | 2021-04-14 | Veneto | 45.434905 | 12.338452 | 1461 | 260 | 1721 | 27721 | 29442 | -1117 | ... | 10.0 | NaN | NaN | 387796.0 | 9962.0 | 4766501.0 | 1861663.0 | ITH | ITH3 | 4879133 |
| 2021-04-15 | 2021-04-15 | Veneto | 45.434905 | 12.338452 | 1396 | 244 | 1640 | 26692 | 28332 | -1110 | ... | 12.0 | NaN | NaN | 388843.0 | 10000.0 | 4784639.0 | 1878885.0 | ITH | ITH3 | 4879133 |
5 rows × 29 columns
df2 = df
fig = px.line(df2, x="Date", y="NewPositives", color="Region", hover_name="Region",
render_mode="svg", log_y=False)
fig.update_layout(title="Daily new positive cases")
fig.show()
df2['MovAv7'] = df2['NewPositives'].rolling(window=7).mean()
fig = px.line(df2[df2.index>'2020-3-1'], x="Date", y="MovAv7", color="Region", hover_name="Region",
render_mode="svg", log_y=False)
fig.update_layout(title="7-day MA of new positive cases")
fig.show()
df2['NewPos_per_100K'] = df2['NewPositives']/df2['Pop']*1000_00
df2['NewPos_per_100K'] = df2['NewPos_per_100K'].rolling(window=7).mean()
fig = px.line(df2[df2.index>'2020-3-1'], x="Date", y="NewPos_per_100K", color="Region",
hover_name="Region", log_y=False)
fig.update_layout(title="7-day MA of new positive cases, per 100K")
fig.show()
df2['PosTests'] = df2['NewPositives']/df2['NoOfTests']*100
fig = px.scatter(df2, y="PosTests", x="Date", color="Region",
hover_name="Region", log_y=True)
fig.update_layout(title="Percentage of positive tests")
fig.show()
df2['IC_per_100K'] = df2['IC']/df2['Pop']*1000_00
fig = px.line(df2, x="Date", y="IC_per_100K", color="Region", hover_name="Region",
render_mode="svg", log_y=False)
fig.update_layout(title="Current number of intensive care patients, per 100K")
fig.show()
df2['Hosp_per_100K'] = df2['HospTotal']/df2['Pop']*1000_00
fig = px.line(df2, x="Date", y="Hosp_per_100K", color="Region", hover_name="Region",
render_mode="svg", log_y=False)
fig.update_layout(title="Current number of hospitalized, per 100K")
fig.show()
df3 = df2.copy()
df3['NewDeaths'] = df3['Deaths'] - df3.groupby(['Region'])['Deaths'].transform('shift')
fig = px.bar(df3, x=df3['Date'], y="NewDeaths", color="Region", hover_name="Date")
fig.update_layout(title="Daily number of deaths")
fig.show()
df2['Deaths_per_100K'] = (df2['Deaths']/df2['Pop'])*1000_00
fig = px.line(df2, x="Date", y="Deaths_per_100K", color="Region",
hover_name="Region", render_mode="svg", line_shape='spline')
fig.update_layout(title="Cumulative number of deaths, per 100K")
fig.show()
df2['Change_per_100K'] = df2['VariationOfPositives']/df2['Pop']*1000_00
df2['Change_per_100K'] = df2['Change_per_100K'].rolling(window=7).mean()
fig = px.line(df2[(df2.index>'2020-3-1') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="Change_per_100K",
color="Region", hover_name="Date")
fig.update_layout(title="7-day MA of change in current positive cases, per 100K (excl. Valle d'Aosta)")
fig.show()
df2['Current_per_100K'] = df2['CurrentlyPositive']/df2['Pop']*1000_00
df2['Current_per_100K'] = df2['Current_per_100K'].rolling(window=14).mean()
fig = px.line(df2[(df2.index>'2020-3-7')], x='Date', y="Current_per_100K", color="Region", hover_name="Date")
fig.update_layout(title="14-day MA of current positive cases, per 100K")
fig.show()
df2 = df
df_sum = df2.drop(['Lat','Long'], axis=1).groupby(df.Date).sum().reset_index()
df_sum2 = pd.melt(df_sum, id_vars=['Date'], value_vars=['NewPositives','IC','HospTotal','CurrentlyPositive'])
fig = px.line(df_sum2, x="Date", y="value", color='variable', hover_name="value", render_mode="svg", log_y=True,
line_shape='spline')
fig.update_layout(title="Number of new and current positives, current IC patients and currently hospitalized")
fig.show()
df2['pos.test.rate.mol'] = df2['totale_positivi_test_molecolare']/df2['tamponi_test_molecolare']*100
fig = px.line(df2[(df2.index>'2021-1-10') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="pos.test.rate.mol",
color="Region", hover_name="Date")
fig.update_layout(title="7-day MA of change in current positive cases, per 100K (excl. Valle d'Aosta)")
fig.show()
df2['pos.test.rate.ant'] = df2['totale_positivi_test_antigenico_rapido']/df2['tamponi_test_antigenico_rapido']*100
fig = px.line(df2[(df2.index>'2021-1-10') & (df2['Region']!="""Valle d'Aosta""")], x='Date', y="pos.test.rate.ant",
color="Region", hover_name="Date")
fig.update_layout(title="7-day MA of change in current positive cases, per 100K (excl. Valle d'Aosta)")
fig.show()